/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.solr.analysis;

import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.AttributeSource;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedList;

/**
 * SynonymFilter handles multi-token synonyms with variable position increment
 * offsets.
 * <p>
 * The matched tokens from the input stream may be optionally passed through
 * (includeOrig=true) or discarded. If the original tokens are included, the
 * position increments may be modified to retain absolute positions after
 * merging with the synonym tokenstream.
 * <p>
 * Generated synonyms will start at the same position as the first matched
 * source token.
 * 
 * @version $Id: SynonymFilter.java 1124242 2011-05-18 12:58:53Z rmuir $
 */
public final class SynonymFilter extends TokenFilter {

	private final SynonymMap map; // Map<String, SynonymMap>
	private Iterator<AttributeSource> replacement; // iterator over generated
													// tokens

	public SynonymFilter(TokenStream in, SynonymMap map) {
		super(in);
		if (map == null)
			throw new IllegalArgumentException("map is required");
		this.map = map;
		// just ensuring these attributes exist...
		addAttribute(CharTermAttribute.class);
		addAttribute(PositionIncrementAttribute.class);
		addAttribute(OffsetAttribute.class);
		addAttribute(TypeAttribute.class);
	}

	/*
	 * Need to worry about multiple scenarios: - need to go for the longest
	 * match a b => foo #shouldn't match if "a b" is followed by "c d" a b c d
	 * => bar - need to backtrack - retry matches for tokens already read a b c
	 * d => foo b c => bar If the input stream is "a b c x", one will consume
	 * "a b c d" trying to match the first rule... all but "a" should be pushed
	 * back so a match may be made on "b c". - don't try and match generated
	 * tokens (thus need separate queue) matching is not recursive. - handle
	 * optional generation of original tokens in all these cases, merging token
	 * streams to preserve token positions. - preserve original
	 * positionIncrement of first matched token
	 */
	@Override
	public boolean incrementToken() throws IOException {
		while (true) {
			// if there are any generated tokens, return them... don't try any
			// matches against them, as we specifically don't want recursion.
			if (replacement != null && replacement.hasNext()) {
				copy(this, replacement.next());
				return true;
			}

			// common case fast-path of first token not matching anything
			AttributeSource firstTok = nextTok();
			if (firstTok == null)
				return false;
			CharTermAttribute termAtt = firstTok
					.addAttribute(CharTermAttribute.class);
			SynonymMap result = map.submap != null ? map.submap.get(
					termAtt.buffer(), 0, termAtt.length()) : null;
			if (result == null) {
				copy(this, firstTok);
				return true;
			}

			// fast-path failed, clone ourselves if needed
			if (firstTok == this)
				firstTok = cloneAttributes();
			// OK, we matched a token, so find the longest match.

			matched = new LinkedList<AttributeSource>();

			result = match(result);

			if (result == null) {
				// no match, simply return the first token read.
				copy(this, firstTok);
				return true;
			}

			// reuse, or create new one each time?
			ArrayList<AttributeSource> generated = new ArrayList<AttributeSource>(
					result.synonyms.length + matched.size() + 1);

			//
			// there was a match... let's generate the new tokens, merging
			// in the matched tokens (position increments need adjusting)
			//
			AttributeSource lastTok = matched.isEmpty() ? firstTok : matched
					.getLast();
			boolean includeOrig = result.includeOrig();

			AttributeSource origTok = includeOrig ? firstTok : null;
			PositionIncrementAttribute firstPosIncAtt = firstTok
					.addAttribute(PositionIncrementAttribute.class);
			int origPos = firstPosIncAtt.getPositionIncrement(); // position of
																	// origTok
																	// in the
																	// original
																	// stream
			int repPos = 0; // curr position in replacement token stream
			int pos = 0; // current position in merged token stream

			for (int i = 0; i < result.synonyms.length; i++) {
				Token repTok = result.synonyms[i];
				AttributeSource newTok = firstTok.cloneAttributes();
				CharTermAttribute newTermAtt = newTok
						.addAttribute(CharTermAttribute.class);
				OffsetAttribute newOffsetAtt = newTok
						.addAttribute(OffsetAttribute.class);
				PositionIncrementAttribute newPosIncAtt = newTok
						.addAttribute(PositionIncrementAttribute.class);

				OffsetAttribute lastOffsetAtt = lastTok
						.addAttribute(OffsetAttribute.class);

				newOffsetAtt.setOffset(newOffsetAtt.startOffset(),
						lastOffsetAtt.endOffset());
				newTermAtt.copyBuffer(repTok.buffer(), 0, repTok.length());
				repPos += repTok.getPositionIncrement();
				if (i == 0)
					repPos = origPos; // make position of first token equal to
										// original

				// if necessary, insert original tokens and adjust position
				// increment
				while (origTok != null && origPos <= repPos) {
					PositionIncrementAttribute origPosInc = origTok
							.addAttribute(PositionIncrementAttribute.class);
					origPosInc.setPositionIncrement(origPos - pos);
					generated.add(origTok);
					pos += origPosInc.getPositionIncrement();
					origTok = matched.isEmpty() ? null : matched.removeFirst();
					if (origTok != null) {
						origPosInc = origTok
								.addAttribute(PositionIncrementAttribute.class);
						origPos += origPosInc.getPositionIncrement();
					}
				}

				newPosIncAtt.setPositionIncrement(repPos - pos);
				generated.add(newTok);
				pos += newPosIncAtt.getPositionIncrement();
			}

			// finish up any leftover original tokens
			while (origTok != null) {
				PositionIncrementAttribute origPosInc = origTok
						.addAttribute(PositionIncrementAttribute.class);
				origPosInc.setPositionIncrement(origPos - pos);
				generated.add(origTok);
				pos += origPosInc.getPositionIncrement();
				origTok = matched.isEmpty() ? null : matched.removeFirst();
				if (origTok != null) {
					origPosInc = origTok
							.addAttribute(PositionIncrementAttribute.class);
					origPos += origPosInc.getPositionIncrement();
				}
			}

			// what if we replaced a longer sequence with a shorter one?
			// a/0 b/5 => foo/0
			// should I re-create the gap on the next buffered token?

			replacement = generated.iterator();
			// Now return to the top of the loop to read and return the first
			// generated token.. The reason this is done is that we may have
			// generated
			// nothing at all, and may need to continue with more matching
			// logic.
		}
	}

	//
	// Defer creation of the buffer until the first time it is used to
	// optimize short fields with no matches.
	//
	private LinkedList<AttributeSource> buffer;
	private LinkedList<AttributeSource> matched;

	private boolean exhausted;

	private AttributeSource nextTok() throws IOException {
		if (buffer != null && !buffer.isEmpty()) {
			return buffer.removeFirst();
		} else {
			if (!exhausted && input.incrementToken()) {
				return this;
			} else {
				exhausted = true;
				return null;
			}
		}
	}

	private void pushTok(AttributeSource t) {
		if (buffer == null)
			buffer = new LinkedList<AttributeSource>();
		buffer.addFirst(t);
	}

	private SynonymMap match(SynonymMap map) throws IOException {
		SynonymMap result = null;

		if (map.submap != null) {
			AttributeSource tok = nextTok();
			if (tok != null) {
				// clone ourselves.
				if (tok == this)
					tok = cloneAttributes();
				// check for positionIncrement!=1? if>1, should not match,
				// if==0, check multiple at this level?
				CharTermAttribute termAtt = tok
						.getAttribute(CharTermAttribute.class);
				SynonymMap subMap = map.submap.get(termAtt.buffer(), 0,
						termAtt.length());

				if (subMap != null) {
					// recurse
					result = match(subMap);
				}

				if (result != null) {
					matched.addFirst(tok);
				} else {
					// push back unmatched token
					pushTok(tok);
				}
			}
		}

		// if no longer sequence matched, so if this node has synonyms, it's the
		// match.
		if (result == null && map.synonyms != null) {
			result = map;
		}

		return result;
	}

	private void copy(AttributeSource target, AttributeSource source) {
		if (target != source)
			source.copyTo(target);
	}

	@Override
	public void reset() throws IOException {
		input.reset();
		replacement = null;
		exhausted = false;
	}
}
